#gpu的加速

from transformers import GPT2Tokenizer, GPT2LMHeadModel



device="cuda:0"
tokenizer = GPT2Tokenizer.from_pretrained('./SFTgpt')
model = GPT2LMHeadModel.from_pretrained('./SFTgpt')
model.to(device)
# 准备输入文本
while True:
    
    input_text = input("your question:")
    input_ids = tokenizer.encode(input_text, return_tensors='pt').to(device)
    # 使用模型生成文本
    output = model.generate(input_ids)
    # 解码生成的文本
    generated_text = tokenizer.decode(output[0], skip_special_tokens=False)
    out="my prank bot:"+generated_text[len(input_text):].strip("<|endoftext|>")
    print(f"\033[33m my question: {input_text} \033[0m")
    print(f"\033[31m {out}\033[0m")